clear all
cap log close
do "$dofiles/02_model\01_pre_est/readin/demographics_2_readin"
do "$dofiles/02_model\01_pre_est/readin/demographics_2_format"

//generation of uniqid varaible
gen uniqid = ER30001*1000 + ER30002 //generation of uniqid identifier variable
duplicates report uniqid //no duplicates!
drop ER30002 ER30000 
ren ER32000 sex

//loop over variables and drop what we can
ds uniqid, not
foreach var in `r(varlist)'{
	
	//fetch label
	local lab: variable label `var'
	
	//kill sequence/release/interview numbers
	if strpos("`lab'", "SEQUENCE") | strpos("`lab'", "RELEASE") | strpos("`lab'", "INTERVIEW")  {
		drop `var'
		continue
	}
	
	if strpos("`lab'", " 6") | strpos("`lab'", " 7") | strpos("`lab'", " 8") | strpos("`lab'", " 9"){
		drop `var'
	}
}

***********big renaming loop; use variable labels
ds uniqid, not
foreach var in `r(varlist)'{
	
	//fetch label
	local lab: variable label `var'
	
	//get year for individual files
	local num = word("`lab'", -1)
	
	//skip variables inconvenient labels for now
	if length("`num'")!=2 | "`var'" == "V4373" {
		continue
	}	
	
	if `num'<20{ //2000s variable
		local year = 2000 + `num'
	}
	if `num'>20{ //1900s variable
		local year = 1900 + `num'
	}
	
	****rename according to label contents
	//age
	if strpos("`lab'", "AGE OF"){
		ren `var' age`year'
	}
	
	//educ
	if strpos("`lab'", "EDUC") | strpos("`lab'", "GRADE") | strpos("`lab'", "SCHL") | strpos("`lab'", "GRAD F"){
		ren `var' educ`year'
	}
	
	//relation to head
	if strpos("`lab'", "RELATION"){
		ren `var' relate`year'
	}
}

ds uniqid sex age* educ* relate*, not
foreach var in `r(varlist)'{
	noi di "ren `var' "
}
d `r(varlist)'

ren V11938 raceh_1985
ren V12293 races_1985
ren V13500 races_1986
ren V13565 raceh_1986
ren V14547 races_1987
ren V14612 raceh_1987
ren V16021 races_1988
ren V16086 raceh_1988
ren V17418 races_1989
ren V17483 raceh_1989
ren V18749 races_1990
ren V18814 raceh_1990
ren V20049 races_1991
ren V20114 raceh_1991
ren V21355 races_1992
ren V21420 raceh_1992
ren V23212 races_1993
ren V23276 raceh_1993
ren ER3883 races_1994
ren ER3944 raceh_1994
ren ER6753 races_1995
ren ER6814 raceh_1995
ren ER8999 races_1996
ren ER9060 raceh_1996
ren ER11760 races_1997
ren ER11848 raceh_1997
ren ER15836 races_1999
ren ER15928 raceh_1999
ren ER19897 races_2001
ren ER19989 raceh_2001
ren ER23334 races_2003
ren ER23426 raceh_2003
ren ER27297 races_2005
ren ER27393 raceh_2005
ren ER40472 races_2007
ren ER40565 raceh_2007
ren ER46449 races_2009
ren ER46543 raceh_2009
ren ER51810 races_2011
ren ER51904 raceh_2011
ren ER57549 races_2013
ren ER57659 raceh_2013
ren ER64671 races_2015
ren ER64810 raceh_2015
ren ER70744 races_2017
ren ER70882 raceh_2017
ren ER76752 races_2019 
ren ER76897 raceh_2019
ren ER81017 races_2021
ren ER81144 raceh_2021

keep uniqid sex age* race* educ* relate*
drop *21*

reshape long age educ relate races_ raceh_, i(uniqid) j(year)
sort uniqid year
drop if year<2001

*****restrict to a few values of relate variable and standardize
replace relate = 1 if relate == 10
replace relate = 2 if relate == 20
replace relate = 3 if relate == 30
keep if relate == 1 | relate == 2 | relate == 3 | relate == .

//clean up relate variable
gen temp = relate
drop relate 
ren temp relate

//clean up relate variable
gen temp = educ
drop educ 
ren temp educ

//race coding
gen race = .
replace race = raceh if relate == 1
replace race = races if relate == 2
replace race = 3 if race!=. & (race>2 | race == 0) //recode non-missings to "other"
lab def race_lab 1 "White" 2 "Black" 3 "Other"
lab val race race_lab
keep uniqid year sex age educ relate race

//fill in missing race data
replace race = race[_n+1] if race[_n] == . & race[_n+1]!=. & uniqid[_n] == uniqid[_n+1]
replace race = race[_n-1] if race[_n] == . & race[_n-1]!=. & uniqid[_n] == uniqid[_n-1]

//still going to need to fill in race from race of parents, most likely

save "$temp/psid_demographics", replace
//end of do-file